import re

html = '''
<!DOCTYPE html>
<html lang="en">

<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Document</title>
</head>

<body>
    <h1>标题1

    </h1>
    <h1>标题2

    </h1>
    <ul>
        <li herf="www.baidu.com/1">链接1</li>
        <li herf="www.baidu.com/2">链接2</li>
        <li herf="www.baidu.com/3">链接3</li>
        <li herf="www.baidu.com/4">链接4</li>
        <li herf="www.baidu.com/5">链接5</li>
    </ul>

</body>

</html>
'''

result = re.search(r'<title>(.*?)</title>', html)
print(result.group(1))

result = re.search(r'<h1>(.*?)</h1>', html, re.S)
print(result.group(1).strip())

results = re.findall(r'<h1>(.*?)</h1>', html, re.S)
for result in results:
    print(result.strip())

results = re.findall(r'<li herf="(.*?)">(.*?)</li>', html)
for result in results:
    print(f'链接: {result[0]}     标题: {result[1]}')

result = re.findall(r'<li herf="\b([^"]+)"', html)
print(result)
